package com.zzyl.common.utils;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;

/**
 * 1)加载PDF文件，转成PDDocument对象
 * 2)创建一个PDFTextStripper实例来提取文本
 * 3)PDFTextStripper 从PDDocument对象中提取文本
 *
 *  is：要解析的pdf文件的字节输入流
 */

public class PDFUtil {

    //
    public static  String pdfToString(InputStream is){
        PDDocument pdDocument = null;
        try {
            //1)加载PDF文件，转成PDDocument对象
             pdDocument = PDDocument.load(is);
            //2)创建一个PDFTextStripper实例来提取文本
            PDFTextStripper pdfTextStripper = new PDFTextStripper();

            //3)PDFTextStripper 从PDDocument对象中提取文本
            String text = pdfTextStripper.getText(pdDocument);
            return text;
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            try {
                pdDocument.close();
                is.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    //测试是否成功
    public static void main(String[] args) throws FileNotFoundException {
        FileInputStream fileInputStream = new FileInputStream("D:/123.pdf");
        String content = pdfToString(fileInputStream);
        System.out.println(content);
    }

}
